taxa <- read_xls("Grazing_Magierowski_et_al_2015.xls", sheet = 1)[,1]
fauna <- read_xls("Grazing_Magierowski_et_al_2015.xls", sheet = 2)
env <- read_xls("Grazing_Magierowski_et_al_2015.xls", sheet = 3)
coord <- read_xls("Grazing_Magierowski_et_al_2015.xls", sheet = 4)
raw <- read_xls("Grazing_Magierowski_et_al_2015.xls", sheet = 5)
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...27`
## • `` -> `...28`
env %>% summary
## SITE Abstraction Regulation
## Length:27 Min. :0.000000 Min. :0.000000
## Class :character 1st Qu.:0.000290 1st Qu.:0.000415
## Mode :character Median :0.002640 Median :0.007890
## Mean :0.009466 Mean :0.014375
## 3rd Qu.:0.013140 3rd Qu.:0.020330
## Max. :0.043370 Max. :0.068820
##
## Grazing (proportion of total catchment area) fines (proportion substrata)
## Min. :0.00000 Min. :0.0000
## 1st Qu.:0.02982 1st Qu.:0.0000
## Median :0.21808 Median :0.0500
## Mean :0.23595 Mean :0.1296
## 3rd Qu.:0.37661 3rd Qu.:0.1000
## Max. :0.77846 Max. :1.0000
##
## Temperature (oC) Conductivity (uS/cm) average turbidity (NTU) pH
## Min. : 9.50 Min. : 20.0 Min. : 0.160 Min. :5.100
## 1st Qu.:11.35 1st Qu.: 67.5 1st Qu.: 1.725 1st Qu.:6.900
## Median :13.70 Median : 99.0 Median : 2.970 Median :7.100
## Mean :14.17 Mean :136.0 Mean : 3.264 Mean :7.192
## 3rd Qu.:17.20 3rd Qu.:168.5 3rd Qu.: 3.840 3rd Qu.:7.500
## Max. :21.00 Max. :481.0 Max. :10.470 Max. :9.100
## NA's :3 NA's :2
## Alkalinity Total (mg CaCO3/L) Nitrate+Nitrite (mg-N/L) DRP (mg-P/L)
## Min. : 2.00 Min. :0.0020 Min. :0.0020
## 1st Qu.: 10.00 1st Qu.:0.0300 1st Qu.:0.0020
## Median : 18.00 Median :0.0890 Median :0.0030
## Mean : 37.92 Mean :0.1538 Mean :0.0068
## 3rd Qu.: 33.00 3rd Qu.:0.2330 3rd Qu.:0.0040
## Max. :196.00 Max. :0.8200 Max. :0.0410
## NA's :2 NA's :2 NA's :2
## N total (mg-N/L) P Total (mg-P/L) Average % shading Average algae cover (%)
## Min. :0.0500 Min. :0.00700 Min. : 0.00 Min. : 0.000
## 1st Qu.:0.2600 1st Qu.:0.01300 1st Qu.:42.50 1st Qu.: 4.833
## Median :0.3900 Median :0.01700 Median :54.25 Median :17.333
## Mean :0.4384 Mean :0.03308 Mean :55.91 Mean :28.195
## 3rd Qu.:0.5700 3rd Qu.:0.02600 3rd Qu.:80.79 3rd Qu.:40.567
## Max. :1.3000 Max. :0.21600 Max. :90.00 Max. :97.667
## NA's :2 NA's :2
## Chl a (mg/m2) GrazingRank
## Min. : 2.180 Length:27
## 1st Qu.: 7.486 Class :character
## Median :15.515 Mode :character
## Mean :21.370
## 3rd Qu.:30.486
## Max. :90.640
##
env <- env %>%
mutate_if(is.character, as.factor)
featurePlot(x = env[,-c(1,10:17,18)],
y = env$GrazingRank,
plot = "box",
scales = list(y = list(relation="free"),
x = list(rot = 90)),
layout = c(4,2),
auto.key = list(columns = 3))
featurePlot(x = env[,-c(1,1:9,18)],
y = env$GrazingRank,
plot = "box",
scales = list(y = list(relation="free"),
x = list(rot = 90)),
layout = c(4,2),
auto.key = list(columns = 3))
We see several outliers in medium category and one in severe. To sum up, there are a lot of correllated features with grazing, including some chemical parameters, temperature, conductivity, regulation.
So I can assume that they are under the
gg <- env %>%
left_join(coord,by = "SITE") %>%
ggplot(aes(lon, lat, color = GrazingRank, text = SITE)) +
geom_point() +
theme_minimal() +
scale_color_viridis_d("Grazing level")
ggplotly(gg)
Places with different grazing levels are not equally distributed.
gg <- fauna %>%
pivot_longer(cols = -1) %>%
mutate(value = value %>% as.numeric()) %>%
ggplot(aes(y = SITE, value)) +
geom_violin(show.legend = F, aes(fill = SITE)) +
geom_jitter(show.legend = F, aes(color = name, text = name)) +
theme_minimal()
## Warning in geom_jitter(show.legend = F, aes(color = name, text = name)):
## Ignoring unknown aesthetics: text
ggplotly(gg) %>%
style(showlegend = FALSE)
normalize <- function(x, na.rm = TRUE) {
#return((x- min(x)) /(max(x)-min(x)))
MX <- min(x)
x <- log10(x+1-MX)
#x - mean(x)
}
fauna_norm <- fauna
fauna_norm[,-1] <- fauna[,-1] %>%
apply(2, normalize) %>%
apply(2, as.numeric)
fauna_norm[is.na(fauna_norm)] <- 0
gg <- fauna_norm %>%
pivot_longer(cols = -1) %>%
mutate(value = value %>% as.numeric()) %>%
ggplot(aes(y = SITE, value)) +
geom_violin(show.legend = F, aes(fill = SITE)) +
geom_jitter(show.legend = F, aes(color = name, text = name), size = .2, alpha = .2) +
theme_minimal()
## Warning in geom_jitter(show.legend = F, aes(color = name, text = name), :
## Ignoring unknown aesthetics: text
ggplotly(gg) %>%
style(showlegend = FALSE)
OK, now much better.
OK, let’s calculate differences between species based on diversity.
c1 <- env$GrazingRank
c1 <- c1 %>% match(unique(c1))
c1 <- viridis(length(unique(c1)))[c1]
fauna %>%
column_to_rownames("SITE") %>%
vegdist(method = "bray") %>%
as.matrix() %>%
heatmap(ColSideColors = c1)
Bray-Curtis beta-diversity is not correlated highly with grazing. So, move on
#NMDS
fauna_norm <- fauna_norm %>%
column_to_rownames("SITE")
NMDS <- fauna_norm %>%
metaMDS(k = 2, dist = "bray")
## Run 0 stress 0.1424228
## Run 1 stress 0.3858432
## Run 2 stress 0.1542763
## Run 3 stress 0.1424216
## ... New best solution
## ... Procrustes: rmse 0.0005240125 max resid 0.002036761
## ... Similar to previous best
## Run 4 stress 0.1410027
## ... New best solution
## ... Procrustes: rmse 0.09555338 max resid 0.364761
## Run 5 stress 0.1528042
## Run 6 stress 0.1430179
## Run 7 stress 0.3747183
## Run 8 stress 0.143018
## Run 9 stress 0.1424851
## Run 10 stress 0.1443807
## Run 11 stress 0.1410045
## ... Procrustes: rmse 0.004164394 max resid 0.01729042
## Run 12 stress 0.1550956
## Run 13 stress 0.1435521
## Run 14 stress 0.1421342
## Run 15 stress 0.1443804
## Run 16 stress 0.15604
## Run 17 stress 0.154891
## Run 18 stress 0.1423213
## Run 19 stress 0.1533854
## Run 20 stress 0.1421343
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
stressplot(NMDS)
ef <- envfit(NMDS, env %>% left_join(coord, by = "SITE"), na.rm = T)
ef$vectors %>% print
## NMDS1 NMDS2 r2 Pr(>r)
## Abstraction -0.82294 0.56813 0.4291 0.004
## Regulation -0.76282 0.64661 0.3923 0.010
## Grazing (proportion of total catchment area) -0.50152 0.86515 0.4395 0.003
## fines (proportion substrata) -0.99339 -0.11479 0.0646 0.447
## Temperature (oC) -0.66498 0.74686 0.1311 0.224
## Conductivity (uS/cm) -0.79826 0.60232 0.2592 0.056
## average turbidity (NTU) -0.97902 0.20374 0.1084 0.322
## pH 0.30932 0.95096 0.0082 0.914
## Alkalinity Total (mg CaCO3/L) -0.53777 0.84309 0.2166 0.094
## Nitrate+Nitrite (mg-N/L) 0.61578 0.78792 0.0557 0.502
## DRP (mg-P/L) 0.03841 -0.99926 0.0061 0.918
## N total (mg-N/L) -0.92571 0.37823 0.0097 0.899
## P Total (mg-P/L) -0.34696 -0.93788 0.0208 0.731
## Average % shading 0.06345 -0.99798 0.2142 0.074
## Average algae cover (%) -0.94533 0.32611 0.2578 0.043
## Chl a (mg/m2) -0.41929 -0.90785 0.0146 0.866
## northing_GDA94 0.09878 -0.99511 0.1760 0.138
## easting_GDA94 -0.99160 0.12937 0.0849 0.398
## lon 0.09885 -0.99510 0.1767 0.137
## lat -0.98725 0.15916 0.0815 0.413
##
## Abstraction **
## Regulation **
## Grazing (proportion of total catchment area) **
## fines (proportion substrata)
## Temperature (oC)
## Conductivity (uS/cm) .
## average turbidity (NTU)
## pH
## Alkalinity Total (mg CaCO3/L) .
## Nitrate+Nitrite (mg-N/L)
## DRP (mg-P/L)
## N total (mg-N/L)
## P Total (mg-P/L)
## Average % shading .
## Average algae cover (%) *
## Chl a (mg/m2)
## northing_GDA94
## easting_GDA94
## lon
## lat
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
cat("##################################\n")
## ##################################
ef$factors %>% print
## Centroids:
## NMDS1 NMDS2
## SITEAnsons river -0.1173 -0.2773
## SITEBlack river 0.0428 -0.2361
## SITEBoobyalla river -0.2440 -0.0930
## SITEDans rivulet 0.4926 -0.3895
## SITEDeep creek 0.0290 0.2207
## SITEDip river -0.0508 0.3381
## SITEDon river -0.5586 0.1162
## SITEDorset river 0.5378 -0.0821
## SITEDuck river 0.1013 0.0981
## SITEEdith creek -0.2384 0.1009
## SITEFord river 0.1788 -0.0367
## SITEGreat Forester river 0.2875 -0.0652
## SITEInglis river 0.0620 0.0641
## SITEMeander river 1.3189 0.2008
## SITEMelin river -0.0112 -0.3972
## SITEPatersonia rivulet -0.0388 -0.1219
## SITEPenguin creek -0.1269 0.0476
## SITEQuamby brook -0.1534 -0.0692
## SITERubicon river -1.1346 0.0863
## SITESalisburycreek -0.3936 -0.0227
## SITESeabrook creek -0.3391 0.1333
## SITESecond river -0.5737 -0.0668
## SITESt Patricks river 0.1687 -0.0039
## SITEWilsons creek 0.0738 -0.1159
## GrazingRankLow 0.2382 -0.0876
## GrazingRankMedium -0.0319 -0.0480
## GrazingRankSevere -0.4483 0.1315
##
## Goodness of fit:
## r2 Pr(>r)
## SITE 1.0000 1.000
## GrazingRank 0.2904 0.014 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
A significant fit to GrazingRank, as well as catching influence of Abstraction, Regulation, Conductivity and some other factors.
NMDS_sm <- data.frame(env %>% left_join(coord, by = "SITE"), scores(NMDS, display = "sites"))
NMDS_sm[,-1] %>%
mutate_if(is.character, as.factor) %>%
mutate_if(is.factor, as.numeric) %>%
cor(use='complete.obs', method = "spearman") %>%
corrplot(tl.cex = .5, order = 'FPC')
If the 1st NMDS component have a lot of factors which can describe it, 2nd have not such a strong correllations - only with Average_shading. So, 1st could be described by Grazing Rank and correlated features. while 2nd by different features complex, main of them are total alkalinity, average shading and longitude.
NMDS_ef <- fortify(ef)
fortify_ordisurf <- function(model) {
xy <- expand.grid(x = model$grid$x, y = model$grid$y)
xyz <- cbind(xy, c(model$grid$z))
names(xyz) <- c("x", "y", "z")
return(na.omit(xyz))
}
fortify_ordisurf <- function(model) {
xy <- expand.grid(x = model$grid$x, y = model$grid$y)
xyz <- cbind(xy, c(model$grid$z))
names(xyz) <- c("x", "y", "z")
return(na.omit(xyz))
}
osf <- function(var) {
env <- env %>% left_join(coord, by = "SITE")
var_mn <- env[,var] %>% unlist %>% as.factor %>% as.numeric
ordisurf(NMDS, var_mn, method = "REML", main = var) %>%
fortify_ordisurf()
}
NMDS_os_GR <- osf("GrazingRank")
NMDS_os_Ab <- osf("Abstraction")
NMDS_os_Rg <- osf("Regulation")
NMDS_os_At <- osf("Alkalinity Total (mg CaCO3/L)")
NMDS_os_As <- osf("Average % shading")
NMDS_os_lon <- osf("lon")
gg <- ggplot() +
geom_point(data = NMDS_sm,
aes(x = NMDS1, y = NMDS2, text = 1:27, shape = GrazingRank)) +
stat_contour(data = NMDS_os_lon, aes(x = x, y = y, z = z, colour = ..level..)) +
geom_segment(data = NMDS_ef[41:43,], linewidth = 0.25,
aes(x = 0, xend = NMDS1, y = 0, yend = NMDS2, text = label),
color = c("cadetblue", "darkgreen","darkred"),
arrow = arrow(length = unit(0.25, "cm"))) +
labs(x = "NMDS1", y = "NMDS2") +
theme_minimal() +
scale_shape_discrete(name = "Grazing Rank") +
scale_color_viridis("longtitude")
## Warning in geom_point(data = NMDS_sm, aes(x = NMDS1, y = NMDS2, text = 1:27, :
## Ignoring unknown aesthetics: text
## Warning in geom_segment(data = NMDS_ef[41:43, ], linewidth = 0.25, aes(x = 0, :
## Ignoring unknown aesthetics: text
ggplotly(gg)
## Warning: The dot-dot notation (`..level..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(level)` instead.
## ℹ The deprecated feature was likely used in the ggplot2 package.
## Please report the issue at <https://github.com/tidyverse/ggplot2/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
OK! now we really now what to find on PCA and CCA
center <- function(x) (x - mean(x))/sd(x)
# remove 0 columns
fauna_norm <- fauna_norm[, apply(fauna_norm, 2, mean) > 0]
res.pca <- fauna_norm %>%
#apply(2,center) %>%
prcomp()
groups <- env$GrazingRank
gg <- fviz_pca_ind(res.pca,axes = c(1,2),
col.ind = groups, # color by groups
palette = c("#00AFBB", "#FC4E07", "yellow"),
addEllipses = TRUE, # Concentration ellipses
ellipse.type = "confidence",
legend.title = "Groups",
repel = TRUE
)
ggplotly(gg)
## Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
Very low % of the explained data, but as on NMDS we can see grouping by grazing level.
df_ca <- cca(fauna_norm)
df_ca$CA$eig %>%
as.data.frame() %>%
rownames_to_column("CA") %>%
mutate(CA = CA %>% str_remove("CA") %>% as.numeric()) %>%
ggplot(aes(CA, .)) +
geom_col() +
ylab("inertia") +
theme_minimal()
CA_site <- df_ca$CA$u[,1:2] %>% cbind(env) %>% cbind(coord[,-1])
CA_sp <- df_ca$CA$v[,1:2] %>%
as.data.frame() %>% rownames_to_column("sp")
gg <- ggplot() +
geom_point(data = CA_sp,
aes(CA1, CA2, text = sp),
shape = 3, size = .5) +
geom_point(data = CA_site,
aes(CA1, CA2, text = SITE,
color = GrazingRank)) +
theme_minimal()
## Warning in geom_point(data = CA_sp, aes(CA1, CA2, text = sp), shape = 3, :
## Ignoring unknown aesthetics: text
## Warning in geom_point(data = CA_site, aes(CA1, CA2, text = SITE, color =
## GrazingRank)): Ignoring unknown aesthetics: text
ggplotly(gg)
# remove NA from CCA
env_full <- env %>%
left_join(coord, by = "SITE") %>%
select_if(~ !any(is.na(.)))
df_cca <- cca(fauna_norm ~ .,
data = env_full[,-1])
df_cca$CA$eig %>%
as.data.frame() %>%
rownames_to_column("CA") %>%
mutate(CA = CA %>% str_remove("CA") %>% as.numeric()) %>%
ggplot(aes(CA, .)) +
geom_col() +
ylab("inertia") +
theme_minimal()
vif.cca(df_cca) %>%
as.data.frame() %>%
rownames_to_column("tp") %>%
mutate(tp = fct_reorder(tp, ., .desc = T) ) %>%
ggplot() +
geom_col(aes(log10(.+1), tp), fill = "blue") +
theme_minimal() + ylab("") + xlab("log10 of variance inflation factor")
Let’s remove identical features with low fit and re-trim the model
env_full_model <- env_full[,-c(1,4,12:13)]
df_cca <- cca(fauna_norm ~ .,
data = env_full_model) %>%
ordistep
##
## Start: fauna_norm ~ Abstraction + Regulation + `fines (proportion substrata)` + `Temperature (oC)` + `Conductivity (uS/cm)` + `Average % shading` + `Average algae cover (%)` + `Chl a (mg/m2)` + GrazingRank + lon + lat
##
## Df AIC F Pr(>F)
## - `Average algae cover (%)` 1 127.96 0.7642 0.880
## - `Conductivity (uS/cm)` 1 128.17 0.8822 0.625
## - `Chl a (mg/m2)` 1 128.24 0.9218 0.590
## - GrazingRank 2 128.08 0.9866 0.565
## - lat 1 128.38 0.9962 0.440
## - `Average % shading` 1 128.45 1.0381 0.400
## - Abstraction 1 128.41 1.0116 0.390
## - Regulation 1 128.55 1.0920 0.260
## - `fines (proportion substrata)` 1 128.80 1.2308 0.120
## - `Temperature (oC)` 1 128.82 1.2418 0.105
## - lon 1 129.38 1.5625 0.020 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: fauna_norm ~ Abstraction + Regulation + `fines (proportion substrata)` + `Temperature (oC)` + `Conductivity (uS/cm)` + `Average % shading` + `Chl a (mg/m2)` + GrazingRank + lon + lat
##
## Df AIC F Pr(>F)
## - `Conductivity (uS/cm)` 1 127.66 0.9753 0.470
## - lat 1 127.76 1.0372 0.390
## - Abstraction 1 127.84 1.0816 0.380
## - `Chl a (mg/m2)` 1 127.80 1.0567 0.325
## - Regulation 1 127.99 1.1725 0.285
## - `Average % shading` 1 128.01 1.1845 0.235
## - GrazingRank 2 127.68 1.1096 0.210
## - `fines (proportion substrata)` 1 128.16 1.2735 0.150
## - `Temperature (oC)` 1 128.14 1.2643 0.100 .
## - lon 1 128.67 1.5835 0.010 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: fauna_norm ~ Abstraction + Regulation + `fines (proportion substrata)` + `Temperature (oC)` + `Average % shading` + `Chl a (mg/m2)` + GrazingRank + lon + lat
##
## Df AIC F Pr(>F)
## - lat 1 127.36 1.0413 0.365
## - Abstraction 1 127.50 1.1266 0.270
## - Regulation 1 127.55 1.1613 0.230
## - `fines (proportion substrata)` 1 127.66 1.2317 0.205
## - GrazingRank 2 127.16 1.1079 0.195
## - `Chl a (mg/m2)` 1 127.47 1.1085 0.190
## - `Average % shading` 1 127.62 1.2029 0.095 .
## - `Temperature (oC)` 1 127.84 1.3464 0.060 .
## - lon 1 128.19 1.5704 0.020 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: fauna_norm ~ Abstraction + Regulation + `fines (proportion substrata)` + `Temperature (oC)` + `Average % shading` + `Chl a (mg/m2)` + GrazingRank + lon
##
## Df AIC F Pr(>F)
## - `Chl a (mg/m2)` 1 127.07 1.1119 0.240
## - Abstraction 1 127.09 1.1270 0.235
## - GrazingRank 2 126.63 1.0947 0.225
## - `Average % shading` 1 127.21 1.2048 0.175
## - `fines (proportion substrata)` 1 127.25 1.2314 0.125
## - Regulation 1 127.20 1.1981 0.100 .
## - `Temperature (oC)` 1 127.81 1.6153 0.010 **
## - lon 1 128.12 1.8301 0.005 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: fauna_norm ~ Abstraction + Regulation + `fines (proportion substrata)` + `Temperature (oC)` + `Average % shading` + GrazingRank + lon
##
## Df AIC F Pr(>F)
## - Abstraction 1 126.70 1.1210 0.230
## - Regulation 1 126.80 1.1867 0.210
## - `fines (proportion substrata)` 1 126.80 1.1871 0.200
## - `Average % shading` 1 126.80 1.1923 0.155
## - GrazingRank 2 126.28 1.1374 0.120
## - `Temperature (oC)` 1 127.29 1.5428 0.010 **
## - lon 1 127.52 1.7106 0.005 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: fauna_norm ~ Regulation + `fines (proportion substrata)` + `Temperature (oC)` + `Average % shading` + GrazingRank + lon
##
## Df AIC F Pr(>F)
## - `fines (proportion substrata)` 1 126.18 1.0703 0.255
## - `Average % shading` 1 126.35 1.1936 0.145
## - GrazingRank 2 125.77 1.1428 0.130
## - `Temperature (oC)` 1 126.82 1.5482 0.015 *
## - lon 1 127.03 1.7108 0.010 **
## - Regulation 1 127.26 1.8857 0.010 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: fauna_norm ~ Regulation + `Temperature (oC)` + `Average % shading` + GrazingRank + lon
##
## Df AIC F Pr(>F)
## - `Average % shading` 1 125.64 1.1119 0.250
## - GrazingRank 2 125.01 1.1040 0.200
## - `Temperature (oC)` 1 126.12 1.4888 0.025 *
## - lon 1 126.33 1.6574 0.015 *
## - Regulation 1 126.49 1.7843 0.010 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: fauna_norm ~ Regulation + `Temperature (oC)` + GrazingRank + lon
##
## Df AIC F Pr(>F)
## - GrazingRank 2 124.59 1.2128 0.06 .
## - `Temperature (oC)` 1 125.69 1.6547 0.01 **
## - lon 1 125.84 1.7815 0.01 **
## - Regulation 1 125.96 1.8835 0.01 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
df_cca
## Call: cca(formula = fauna_norm ~ Regulation + `Temperature (oC)` +
## GrazingRank + lon, data = env_full_model)
##
## Inertia Proportion Rank
## Total 2.5727 1.0000
## Constrained 0.7209 0.2802 5
## Unconstrained 1.8517 0.7198 21
## Inertia is scaled Chi-square
##
## Eigenvalues for constrained axes:
## CCA1 CCA2 CCA3 CCA4 CCA5
## 0.26551 0.15678 0.11502 0.09777 0.08584
##
## Eigenvalues for unconstrained axes:
## CA1 CA2 CA3 CA4 CA5 CA6 CA7 CA8
## 0.19980 0.17845 0.15066 0.13585 0.11915 0.11597 0.10305 0.09697
## (Showing 8 of 21 unconstrained eigenvalues)
~70% of unconstrained inertia is high
df_cca$CA$eig %>%
as.data.frame() %>%
rownames_to_column("CA") %>%
mutate(CA = CA %>% str_remove("CA") %>% as.numeric()) %>%
ggplot(aes(CA, .)) +
geom_col() +
ylab("inertia") +
theme_minimal()
vif.cca(df_cca) %>%
as.data.frame() %>%
rownames_to_column("tp") %>%
mutate(tp = fct_reorder(tp, ., .desc = T) ) %>%
ggplot() +
geom_col(aes(log10(.+1), tp), fill = "blue") +
theme_minimal() + ylab("") + xlab("log10 of variance inflation factor")
intersetcor(df_cca) %>%
as.data.frame() %>%
rownames_to_column("tp") %>%
mutate(tp = fct_reorder(tp, CCA1, .desc = T) ) %>%
pivot_longer(cols = -1) %>%
ggplot() +
geom_boxplot(aes(value, tp), color = "blue") +
theme_minimal() + ylab("") + xlab("interset correllation")
intersetcor(df_cca) %>%
as.data.frame() %>%
rownames_to_column("tp") %>%
mutate(tp = fct_reorder(tp, CCA1, .desc = T) ) %>%
pivot_longer(cols = -1) %>%
ggplot() +
geom_col(aes(value, tp), fill = "blue") +
facet_grid(~name) +
theme_minimal(base_size = 7) + ylab("") + xlab("interset correllation")
anova(df_cca, type = "term")
## Permutation test for cca under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: cca(formula = fauna_norm ~ Regulation + `Temperature (oC)` + GrazingRank + lon, data = env_full_model)
## Df ChiSquare F Pr(>F)
## Model 5 0.72091 1.6351 0.001 ***
## Residual 21 1.85175
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Well-fitting model)
df_cca %>%
plot(scaling = "sites")
df_cca %>%
plot(scaling = 2, display = c("species", "cn"))
Main influencing features seems to be temperature, regulation, longtitude and grazing.
df_splsda <- fauna_norm %>% splsda(env$GrazingRank %>% as.factor)
plotIndiv(df_splsda)
sPLS-DA predict <16% of all differences explanations related to grazing
#GLMs
Last: who are the best indicators for grazing?
Simplest model - calculate univariate regressions between grazing and amounts; as a fit let’s use R2 and PR(z>0)
X <- env$GrazingRank %>% ordered
GR_fit <- function(Y) {
tmp <- glm(X~Y, family = "binomial")
tmp2 <- tmp %>% summary
a <- with(tmp, 1 - deviance/null.deviance)
b <- tmp2[["coefficients"]][2,"Pr(>|z|)"]
return(c(a,b))
}
R2 <- fauna_norm %>%
apply(2, GR_fit) %>%
as.data.frame %>% t %>%
as.data.frame() %>%
rownames_to_column("sp")
## Warning: glm.fit: возникли подогнанные вероятности 0 или 1
## Warning: glm.fit: возникли подогнанные вероятности 0 или 1
## Warning: glm.fit: возникли подогнанные вероятности 0 или 1
## Warning: glm.fit: возникли подогнанные вероятности 0 или 1
## Warning: glm.fit: возникли подогнанные вероятности 0 или 1
## Warning: glm.fit: возникли подогнанные вероятности 0 или 1
## Warning: glm.fit: возникли подогнанные вероятности 0 или 1
## Warning: glm.fit: возникли подогнанные вероятности 0 или 1
## Warning: glm.fit: возникли подогнанные вероятности 0 или 1
## Warning: glm.fit: возникли подогнанные вероятности 0 или 1
gg <- R2 %>%
ggplot(aes(V1, -V2, text = sp, color = V1)) +
geom_point() +
theme_minimal() +
xlab("R2") + ylab("-Pr(|z|>0)")
ggplotly(gg)
There are a few fitted species, but overall R2 value is pretty low - < 0.25. So, like by CCA, it is not the major factor, but one of influencing, in complex with other factors.